mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-03 17:38:25 +02:00
radv: add a pass that selects either frag_coord_xy or pixel_coord, but not both
to reduce the number of initialized PS VGPRs, increasing the PS wave launch rate. The pass will have more RADV-specific stuff. Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41689>
This commit is contained in:
parent
a5ba7694b5
commit
6cd04ca6e5
6 changed files with 135 additions and 5 deletions
|
|
@ -145,10 +145,12 @@ BEGIN_TEST(d3d11_derivs.bias)
|
|||
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
||||
pbld.add_vsfs(vs, fs);
|
||||
|
||||
//>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias:v[2] = p_startpgm
|
||||
//>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias1:v[2] = p_startpgm
|
||||
//>> lv3: %wqm = p_start_linear_vgpr v1: undef, (kill)%_, (kill)%_
|
||||
//>> v1: %bias2 = v_cvt_f32_u32 (kill)%bias1 dst_sel:dword src0_sel:uword0
|
||||
//>> v1: %bias3 = v_add_f32 0.5, (kill)%bias2
|
||||
//>> BB1
|
||||
//>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%bias 2d
|
||||
//>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%bias3 2d
|
||||
//>> BB2
|
||||
//>> BB6
|
||||
//>> p_end_linear_vgpr (kill)%wqm
|
||||
|
|
@ -279,11 +281,13 @@ BEGIN_TEST(d3d11_derivs.bias_array)
|
|||
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
||||
pbld.add_vsfs(vs, fs);
|
||||
|
||||
//>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias:v[2] = p_startpgm
|
||||
//>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias1:v[2] = p_startpgm
|
||||
//>> v1: %layer = v_rndne_f32 (kill)%_
|
||||
//>> lv4: %wqm = p_start_linear_vgpr v1: undef, (kill)%_, (kill)%_, (kill)%layer
|
||||
//>> v1: %bias2 = v_cvt_f32_u32 (kill)%bias1 dst_sel:dword src0_sel:uword0
|
||||
//>> v1: %bias3 = v_add_f32 0.5, (kill)%bias2
|
||||
//>> BB1
|
||||
//>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%bias 2darray da
|
||||
//>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%bias3 2darray da
|
||||
//>> BB2
|
||||
//>> BB6
|
||||
//>> p_end_linear_vgpr (kill)%wqm
|
||||
|
|
|
|||
|
|
@ -89,6 +89,7 @@ libradv_files = files(
|
|||
'nir/radv_nir_lower_immediate_samplers.c',
|
||||
'nir/radv_nir_lower_intrinsics_early.c',
|
||||
'nir/radv_nir_lower_io.c',
|
||||
'nir/radv_nir_lower_opt_fs_frag_pos.c',
|
||||
'nir/radv_nir_lower_primitive_shading_rate.c',
|
||||
'nir/radv_nir_lower_printf.c',
|
||||
'nir/radv_nir_lower_ray_queries.c',
|
||||
|
|
|
|||
|
|
@ -96,6 +96,8 @@ bool radv_nir_opt_tid_function(nir_shader *shader, const radv_nir_opt_tid_functi
|
|||
bool radv_nir_opt_fs_builtins(nir_shader *shader, const struct radv_graphics_state_key *gfx_state,
|
||||
unsigned vgt_outprim_type);
|
||||
|
||||
bool radv_nir_lower_opt_fs_frag_pos(nir_shader *shader, bool force_pixel_coord);
|
||||
|
||||
bool radv_nir_lower_immediate_samplers(nir_shader *shader, const struct radv_compiler_info *compiler_info,
|
||||
const struct radv_shader_stage *stage);
|
||||
|
||||
|
|
|
|||
118
src/amd/vulkan/nir/radv_nir_lower_opt_fs_frag_pos.c
Normal file
118
src/amd/vulkan/nir/radv_nir_lower_opt_fs_frag_pos.c
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
/* Copyright © 2026 Valve Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
/* Lower frag_coord_xy to pixel_coord if either force_pixel_coord=true or all uses of all instances
|
||||
* of frag_coord_xy ignore the fractional part, and lower pixel_coord to frag_coord_xy if both
|
||||
* pixel_coord and frag_coord_xy exist and frag_coord_xy has at least one use that doesn't ignore
|
||||
* the fractional part. At the end of the pass, only frag_coord_xy or pixel_coord can be present,
|
||||
* not both.
|
||||
*
|
||||
* sample_pos counts as a frag_coord_xy use and is lowered to frag_coord_xy here.
|
||||
*/
|
||||
|
||||
#include "nir_builder.h"
|
||||
#include "radv_nir.h"
|
||||
#include "radv_shader_info.h"
|
||||
|
||||
typedef struct {
|
||||
/* gather_frag_coord_and_pixel_coord */
|
||||
bool has_frag_coord_xy;
|
||||
bool has_frag_coord_xy_float_use;
|
||||
bool has_pixel_coord;
|
||||
bool has_sample_pos;
|
||||
|
||||
/* lower_frag_coord_and_pixel_coord */
|
||||
bool lower_to_pixel_coord;
|
||||
bool lower_to_frag_coord_xy;
|
||||
} opt_fs_frag_coord_and_pixel_coord_state;
|
||||
|
||||
static bool
|
||||
gather_fs_frag_pos(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
opt_fs_frag_coord_and_pixel_coord_state *state = (opt_fs_frag_coord_and_pixel_coord_state *)data;
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_frag_coord:
|
||||
UNREACHABLE("only frag_coord_xy is expected");
|
||||
|
||||
case nir_intrinsic_load_frag_coord_xy:
|
||||
case nir_intrinsic_load_sample_pos:
|
||||
assert(!nir_def_is_unused(&intr->def));
|
||||
|
||||
if (!state->has_frag_coord_xy_float_use && !nir_all_uses_of_float_are_integer(&intr->def, 0x3))
|
||||
state->has_frag_coord_xy_float_use = true;
|
||||
|
||||
state->has_frag_coord_xy |= intr->intrinsic == nir_intrinsic_load_frag_coord_xy;
|
||||
state->has_sample_pos |= intr->intrinsic == nir_intrinsic_load_sample_pos;
|
||||
return false;
|
||||
|
||||
case nir_intrinsic_load_pixel_coord:
|
||||
state->has_pixel_coord = true;
|
||||
return false;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_fs_frag_pos(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
opt_fs_frag_coord_and_pixel_coord_state *state = (opt_fs_frag_coord_and_pixel_coord_state *)data;
|
||||
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_frag_coord_xy:
|
||||
if (state->lower_to_pixel_coord) {
|
||||
nir_def_replace(&intr->def, nir_fadd_imm(b, nir_u2f32(b, nir_load_pixel_coord(b)), 0.5));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
case nir_intrinsic_load_sample_pos:
|
||||
if (state->lower_to_pixel_coord) {
|
||||
/* This is unlikely and only possible with integer use. */
|
||||
nir_def_replace(&intr->def, nir_imm_vec2(b, 0, 0));
|
||||
} else {
|
||||
nir_def_replace(&intr->def, nir_ffract(b, nir_load_frag_coord_xy(b)));
|
||||
}
|
||||
return true;
|
||||
|
||||
case nir_intrinsic_load_pixel_coord:
|
||||
if (state->lower_to_frag_coord_xy) {
|
||||
nir_def_replace(&intr->def, nir_f2u16(b, nir_load_frag_coord_xy(b)));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
radv_nir_lower_opt_fs_frag_pos(nir_shader *shader, bool force_pixel_coord)
|
||||
{
|
||||
if (force_pixel_coord) {
|
||||
opt_fs_frag_coord_and_pixel_coord_state state = {
|
||||
.lower_to_pixel_coord = true,
|
||||
};
|
||||
|
||||
return nir_shader_intrinsics_pass(shader, lower_fs_frag_pos, nir_metadata_control_flow, &state);
|
||||
} else {
|
||||
opt_fs_frag_coord_and_pixel_coord_state state = {0};
|
||||
|
||||
nir_shader_intrinsics_pass(shader, gather_fs_frag_pos, nir_metadata_all, &state);
|
||||
state.lower_to_pixel_coord =
|
||||
(state.has_frag_coord_xy || state.has_sample_pos) && !state.has_frag_coord_xy_float_use;
|
||||
state.lower_to_frag_coord_xy =
|
||||
(state.has_pixel_coord || state.has_sample_pos) && state.has_frag_coord_xy_float_use;
|
||||
|
||||
if (!state.lower_to_pixel_coord && !state.lower_to_frag_coord_xy)
|
||||
return false;
|
||||
|
||||
return nir_shader_intrinsics_pass(shader, lower_fs_frag_pos, nir_metadata_control_flow, &state);
|
||||
}
|
||||
}
|
||||
|
|
@ -1714,6 +1714,7 @@ radv_generate_graphics_state_key(const struct radv_compiler_info *compiler_info,
|
|||
}
|
||||
|
||||
key.ps.force_vrs_enabled = compiler_info->force_vrs_enabled && !radv_is_static_vrs_enabled(state);
|
||||
key.vrs_may_be_enabled = radv_is_vrs_enabled(state) || key.ps.force_vrs_enabled;
|
||||
|
||||
if ((radv_is_vrs_enabled(state) || key.ps.force_vrs_enabled) && compiler_info->ac->has_vrs_frag_pos_z_bug)
|
||||
key.adjust_frag_coord_z = true;
|
||||
|
|
@ -2563,7 +2564,10 @@ radv_graphics_shaders_compile(const struct radv_compiler_info *compiler_info, st
|
|||
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_cse);
|
||||
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_copy_prop);
|
||||
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dce);
|
||||
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_frag_coord_to_pixel_coord);
|
||||
|
||||
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_opt_fs_frag_pos,
|
||||
!gfx_state->vrs_may_be_enabled && !gfx_state->ms.sample_shading_enable &&
|
||||
!stages[MESA_SHADER_FRAGMENT].nir->info.fs.uses_sample_shading);
|
||||
|
||||
/* Lower the view index to map on the layer. */
|
||||
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_view_index);
|
||||
|
|
|
|||
|
|
@ -133,6 +133,7 @@ struct radv_graphics_state_key {
|
|||
uint32_t lib_flags : 4; /* VkGraphicsPipelineLibraryFlagBitsEXT */
|
||||
|
||||
uint32_t has_multiview_view_index : 1;
|
||||
uint32_t vrs_may_be_enabled : 1;
|
||||
uint32_t adjust_frag_coord_z : 1;
|
||||
uint32_t dynamic_rasterization_samples : 1;
|
||||
uint32_t dynamic_provoking_vtx_mode : 1;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue