mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 05:08:08 +02:00
anv/brw: shrink FS varying payload
We're currently allocating payload spots for 3 fields already delivered somewhere else in the payload. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Ivan Briano <ivan.briano@intel.com> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34109>
This commit is contained in:
parent
c467444670
commit
62d2e323ba
4 changed files with 46 additions and 62 deletions
|
|
@ -4682,7 +4682,7 @@ iris_compute_first_urb_slot_required(uint64_t inputs_read,
|
|||
const struct intel_vue_map *prev_stage_vue_map)
|
||||
{
|
||||
#if GFX_VER >= 9
|
||||
return brw_compute_first_urb_slot_required(inputs_read, prev_stage_vue_map);
|
||||
return brw_compute_first_fs_urb_slot_required(inputs_read, prev_stage_vue_map);
|
||||
#else
|
||||
return elk_compute_first_urb_slot_required(inputs_read, prev_stage_vue_map);
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -791,18 +791,9 @@ calculate_urb_setup(const struct intel_device_info *devinfo,
|
|||
} else {
|
||||
assert(!nir->info.per_primitive_inputs);
|
||||
|
||||
uint64_t vue_header_bits =
|
||||
VARYING_BIT_PSIZ | VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT;
|
||||
const uint64_t vue_header_bits = BRW_VUE_HEADER_VARYING_MASK;
|
||||
|
||||
uint64_t unique_fs_attrs = inputs_read & BRW_FS_VARYING_INPUT_MASK;
|
||||
|
||||
/* VUE header fields all live in the same URB slot, so we pass them
|
||||
* as a single FS input attribute. We want to only count them once.
|
||||
*/
|
||||
if (inputs_read & vue_header_bits) {
|
||||
unique_fs_attrs &= ~vue_header_bits;
|
||||
unique_fs_attrs |= VARYING_BIT_PSIZ;
|
||||
}
|
||||
uint64_t unique_fs_attrs = inputs_read & BRW_FS_VARYING_INPUT_MASK & ~vue_header_bits;
|
||||
|
||||
if (util_bitcount64(unique_fs_attrs) <= 16) {
|
||||
/* The SF/SBE pipeline stage can do arbitrary rearrangement of the
|
||||
|
|
@ -813,20 +804,7 @@ calculate_urb_setup(const struct intel_device_info *devinfo,
|
|||
* fragment shader won't take up valuable register space, and (b) we
|
||||
* won't have to recompile the fragment shader if it gets paired with
|
||||
* a different vertex (or geometry) shader.
|
||||
*
|
||||
* VUE header fields share the same FS input attribute.
|
||||
*/
|
||||
if (inputs_read & vue_header_bits) {
|
||||
if (inputs_read & VARYING_BIT_PSIZ)
|
||||
prog_data->urb_setup[VARYING_SLOT_PSIZ] = urb_next;
|
||||
if (inputs_read & VARYING_BIT_LAYER)
|
||||
prog_data->urb_setup[VARYING_SLOT_LAYER] = urb_next;
|
||||
if (inputs_read & VARYING_BIT_VIEWPORT)
|
||||
prog_data->urb_setup[VARYING_SLOT_VIEWPORT] = urb_next;
|
||||
|
||||
urb_next++;
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) {
|
||||
if (inputs_read & BRW_FS_VARYING_INPUT_MASK & ~vue_header_bits &
|
||||
BITFIELD64_BIT(i)) {
|
||||
|
|
@ -850,8 +828,8 @@ calculate_urb_setup(const struct intel_device_info *devinfo,
|
|||
nir->info.separate_shader, 1);
|
||||
|
||||
int first_slot =
|
||||
brw_compute_first_urb_slot_required(inputs_read,
|
||||
&prev_stage_vue_map);
|
||||
brw_compute_first_fs_urb_slot_required(unique_fs_attrs,
|
||||
&prev_stage_vue_map);
|
||||
|
||||
assert(prev_stage_vue_map.num_slots <= first_slot + 32);
|
||||
for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;
|
||||
|
|
@ -1908,3 +1886,29 @@ brw_print_fs_urb_setup(FILE *fp, const struct brw_wm_prog_data *prog_data)
|
|||
MESA_SHADER_FRAGMENT));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
brw_compute_first_fs_urb_slot_required(uint64_t inputs_read,
|
||||
const struct intel_vue_map *prev_stage_vue_map)
|
||||
{
|
||||
/* The header slots are irrelevant for the URB varying slots. They are
|
||||
* delivered somewhere else in the thread payload.
|
||||
*
|
||||
* For example on DG2:
|
||||
* - PRIMITIVE_SHADING_RATE : R1.0, ActualCoarsePixelShadingSize.(X|Y)
|
||||
* - LAYER : R1.1, Render Target Array Index
|
||||
* - VIEWPORT : R1.1, Viewport Index
|
||||
* - PSIZ : not available in fragment shaders
|
||||
*/
|
||||
inputs_read &= ~BRW_VUE_HEADER_VARYING_MASK;
|
||||
|
||||
for (int i = 0; i < prev_stage_vue_map->num_slots; i++) {
|
||||
int varying = prev_stage_vue_map->slot_to_varying[i];
|
||||
if (varying != BRW_VARYING_SLOT_PAD && varying > 0 &&
|
||||
(inputs_read & BITFIELD64_BIT(varying)) != 0) {
|
||||
return ROUND_DOWN_TO(i, 2);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1021,6 +1021,13 @@ typedef enum
|
|||
BRW_VARYING_SLOT_COUNT
|
||||
} brw_varying_slot;
|
||||
|
||||
|
||||
#define BRW_VUE_HEADER_VARYING_MASK \
|
||||
(VARYING_BIT_VIEWPORT | \
|
||||
VARYING_BIT_LAYER | \
|
||||
VARYING_BIT_PRIMITIVE_SHADING_RATE | \
|
||||
VARYING_BIT_PSIZ)
|
||||
|
||||
/**
|
||||
* Bitmask indicating which fragment shader inputs represent varyings (and
|
||||
* hence have to be delivered to the fragment shader by the SF/SBE stage).
|
||||
|
|
@ -1605,31 +1612,13 @@ brw_stage_has_packed_dispatch(ASSERTED const struct intel_device_info *devinfo,
|
|||
* that is used in the next stage. We do this by testing the varying slots in
|
||||
* the previous stage's vue map against the inputs read in the next stage.
|
||||
*
|
||||
* Note that:
|
||||
*
|
||||
* - Each URB offset contains two varying slots and we can only skip a
|
||||
* full offset if both slots are unused, so the value we return here is always
|
||||
* rounded down to the closest multiple of two.
|
||||
*
|
||||
* - gl_Layer and gl_ViewportIndex don't have their own varying slots, they are
|
||||
* part of the vue header, so if these are read we can't skip anything.
|
||||
* Note that each URB offset contains two varying slots and we can only skip a
|
||||
* full offset if both slots are unused, so the value we return here is always
|
||||
* rounded down to the closest multiple of two.
|
||||
*/
|
||||
static inline int
|
||||
brw_compute_first_urb_slot_required(uint64_t inputs_read,
|
||||
const struct intel_vue_map *prev_stage_vue_map)
|
||||
{
|
||||
if ((inputs_read & (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT | VARYING_BIT_PRIMITIVE_SHADING_RATE)) == 0) {
|
||||
for (int i = 0; i < prev_stage_vue_map->num_slots; i++) {
|
||||
int varying = prev_stage_vue_map->slot_to_varying[i];
|
||||
if (varying != BRW_VARYING_SLOT_PAD && varying > 0 &&
|
||||
(inputs_read & BITFIELD64_BIT(varying)) != 0) {
|
||||
return ROUND_DOWN_TO(i, 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
int
|
||||
brw_compute_first_fs_urb_slot_required(uint64_t inputs_read,
|
||||
const struct intel_vue_map *prev_stage_vue_map);
|
||||
|
||||
/* From InlineData in 3DSTATE_TASK_SHADER_DATA and 3DSTATE_MESH_SHADER_DATA. */
|
||||
#define BRW_TASK_MESH_INLINE_DATA_SIZE_DW 8
|
||||
|
|
|
|||
|
|
@ -685,8 +685,8 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
|
|||
&anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map;
|
||||
|
||||
int first_slot =
|
||||
brw_compute_first_urb_slot_required(wm_prog_data->inputs,
|
||||
fs_input_map);
|
||||
brw_compute_first_fs_urb_slot_required(wm_prog_data->inputs,
|
||||
fs_input_map);
|
||||
assert(first_slot % 2 == 0);
|
||||
unsigned urb_entry_read_offset = first_slot / 2;
|
||||
int max_source_attr = 0;
|
||||
|
|
@ -696,15 +696,6 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
|
|||
|
||||
assert(0 <= input_index);
|
||||
|
||||
/* gl_Viewport, gl_Layer and FragmentShadingRateKHR are stored in the
|
||||
* VUE header
|
||||
*/
|
||||
if (attr == VARYING_SLOT_VIEWPORT ||
|
||||
attr == VARYING_SLOT_LAYER ||
|
||||
attr == VARYING_SLOT_PRIMITIVE_SHADING_RATE) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (attr == VARYING_SLOT_PNTC) {
|
||||
sbe.PointSpriteTextureCoordinateEnable = 1 << input_index;
|
||||
continue;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue