anv/brw: shrink FS varying payload

We're currently allocating payload spots for 3 fields already
delivered somewhere else in the payload.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34109>
This commit is contained in:
Lionel Landwerlin 2025-03-19 12:11:53 +02:00 committed by Marge Bot
parent c467444670
commit 62d2e323ba
4 changed files with 46 additions and 62 deletions

View file

@ -4682,7 +4682,7 @@ iris_compute_first_urb_slot_required(uint64_t inputs_read,
const struct intel_vue_map *prev_stage_vue_map)
{
#if GFX_VER >= 9
return brw_compute_first_urb_slot_required(inputs_read, prev_stage_vue_map);
return brw_compute_first_fs_urb_slot_required(inputs_read, prev_stage_vue_map);
#else
return elk_compute_first_urb_slot_required(inputs_read, prev_stage_vue_map);
#endif

View file

@ -791,18 +791,9 @@ calculate_urb_setup(const struct intel_device_info *devinfo,
} else {
assert(!nir->info.per_primitive_inputs);
uint64_t vue_header_bits =
VARYING_BIT_PSIZ | VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT;
const uint64_t vue_header_bits = BRW_VUE_HEADER_VARYING_MASK;
uint64_t unique_fs_attrs = inputs_read & BRW_FS_VARYING_INPUT_MASK;
/* VUE header fields all live in the same URB slot, so we pass them
* as a single FS input attribute. We want to only count them once.
*/
if (inputs_read & vue_header_bits) {
unique_fs_attrs &= ~vue_header_bits;
unique_fs_attrs |= VARYING_BIT_PSIZ;
}
uint64_t unique_fs_attrs = inputs_read & BRW_FS_VARYING_INPUT_MASK & ~vue_header_bits;
if (util_bitcount64(unique_fs_attrs) <= 16) {
/* The SF/SBE pipeline stage can do arbitrary rearrangement of the
@ -813,20 +804,7 @@ calculate_urb_setup(const struct intel_device_info *devinfo,
* fragment shader won't take up valuable register space, and (b) we
* won't have to recompile the fragment shader if it gets paired with
* a different vertex (or geometry) shader.
*
* VUE header fields share the same FS input attribute.
*/
if (inputs_read & vue_header_bits) {
if (inputs_read & VARYING_BIT_PSIZ)
prog_data->urb_setup[VARYING_SLOT_PSIZ] = urb_next;
if (inputs_read & VARYING_BIT_LAYER)
prog_data->urb_setup[VARYING_SLOT_LAYER] = urb_next;
if (inputs_read & VARYING_BIT_VIEWPORT)
prog_data->urb_setup[VARYING_SLOT_VIEWPORT] = urb_next;
urb_next++;
}
for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) {
if (inputs_read & BRW_FS_VARYING_INPUT_MASK & ~vue_header_bits &
BITFIELD64_BIT(i)) {
@ -850,8 +828,8 @@ calculate_urb_setup(const struct intel_device_info *devinfo,
nir->info.separate_shader, 1);
int first_slot =
brw_compute_first_urb_slot_required(inputs_read,
&prev_stage_vue_map);
brw_compute_first_fs_urb_slot_required(unique_fs_attrs,
&prev_stage_vue_map);
assert(prev_stage_vue_map.num_slots <= first_slot + 32);
for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;
@ -1908,3 +1886,29 @@ brw_print_fs_urb_setup(FILE *fp, const struct brw_wm_prog_data *prog_data)
MESA_SHADER_FRAGMENT));
}
}
extern "C" int
brw_compute_first_fs_urb_slot_required(uint64_t inputs_read,
const struct intel_vue_map *prev_stage_vue_map)
{
/* The header slots are irrelevant for the URB varying slots. They are
* delivered somewhere else in the thread payload.
*
* For example on DG2:
* - PRIMITIVE_SHADING_RATE : R1.0, ActualCoarsePixelShadingSize.(X|Y)
* - LAYER : R1.1, Render Target Array Index
* - VIEWPORT : R1.1, Viewport Index
* - PSIZ : not available in fragment shaders
*/
inputs_read &= ~BRW_VUE_HEADER_VARYING_MASK;
for (int i = 0; i < prev_stage_vue_map->num_slots; i++) {
int varying = prev_stage_vue_map->slot_to_varying[i];
if (varying != BRW_VARYING_SLOT_PAD && varying > 0 &&
(inputs_read & BITFIELD64_BIT(varying)) != 0) {
return ROUND_DOWN_TO(i, 2);
}
}
return 0;
}

View file

@ -1021,6 +1021,13 @@ typedef enum
BRW_VARYING_SLOT_COUNT
} brw_varying_slot;
#define BRW_VUE_HEADER_VARYING_MASK \
(VARYING_BIT_VIEWPORT | \
VARYING_BIT_LAYER | \
VARYING_BIT_PRIMITIVE_SHADING_RATE | \
VARYING_BIT_PSIZ)
/**
* Bitmask indicating which fragment shader inputs represent varyings (and
* hence have to be delivered to the fragment shader by the SF/SBE stage).
@ -1605,31 +1612,13 @@ brw_stage_has_packed_dispatch(ASSERTED const struct intel_device_info *devinfo,
* that is used in the next stage. We do this by testing the varying slots in
* the previous stage's vue map against the inputs read in the next stage.
*
* Note that:
*
* - Each URB offset contains two varying slots and we can only skip a
* full offset if both slots are unused, so the value we return here is always
* rounded down to the closest multiple of two.
*
* - gl_Layer and gl_ViewportIndex don't have their own varying slots, they are
* part of the vue header, so if these are read we can't skip anything.
* Note that each URB offset contains two varying slots and we can only skip a
* full offset if both slots are unused, so the value we return here is always
* rounded down to the closest multiple of two.
*/
static inline int
brw_compute_first_urb_slot_required(uint64_t inputs_read,
const struct intel_vue_map *prev_stage_vue_map)
{
if ((inputs_read & (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT | VARYING_BIT_PRIMITIVE_SHADING_RATE)) == 0) {
for (int i = 0; i < prev_stage_vue_map->num_slots; i++) {
int varying = prev_stage_vue_map->slot_to_varying[i];
if (varying != BRW_VARYING_SLOT_PAD && varying > 0 &&
(inputs_read & BITFIELD64_BIT(varying)) != 0) {
return ROUND_DOWN_TO(i, 2);
}
}
}
return 0;
}
int
brw_compute_first_fs_urb_slot_required(uint64_t inputs_read,
const struct intel_vue_map *prev_stage_vue_map);
/* From InlineData in 3DSTATE_TASK_SHADER_DATA and 3DSTATE_MESH_SHADER_DATA. */
#define BRW_TASK_MESH_INLINE_DATA_SIZE_DW 8

View file

@ -685,8 +685,8 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
&anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map;
int first_slot =
brw_compute_first_urb_slot_required(wm_prog_data->inputs,
fs_input_map);
brw_compute_first_fs_urb_slot_required(wm_prog_data->inputs,
fs_input_map);
assert(first_slot % 2 == 0);
unsigned urb_entry_read_offset = first_slot / 2;
int max_source_attr = 0;
@ -696,15 +696,6 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
assert(0 <= input_index);
/* gl_Viewport, gl_Layer and FragmentShadingRateKHR are stored in the
* VUE header
*/
if (attr == VARYING_SLOT_VIEWPORT ||
attr == VARYING_SLOT_LAYER ||
attr == VARYING_SLOT_PRIMITIVE_SHADING_RATE) {
continue;
}
if (attr == VARYING_SLOT_PNTC) {
sbe.PointSpriteTextureCoordinateEnable = 1 << input_index;
continue;