mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-30 18:40:13 +01:00
intel/fs: handle URB setup for fast linked mesh pipelines
Up until now, the mesh pipeline assumed it would be always linked to the
fragment shader, and so the calculated MUE map would always be
available.
That is not the case for fast linked pipeline libraries, so the URB
setup needs to account for this. We do this by replicating what's done
for non-mesh pipelines, defining the URB based on the FS inputs, and
always assuming they will be laid out in order of varying number, except
that we also account for per-primitive attributes.
Fixes all GPL using tests under dEQP-VK.mesh_shader.ext.smoke.*
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
(cherry picked from commit 4eddeea7bf)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25188>
This commit is contained in:
parent
dfb38ea10c
commit
44ea732eed
1 changed files with 111 additions and 77 deletions
|
|
@ -1773,7 +1773,7 @@ calculate_urb_setup(const struct intel_device_info *devinfo,
|
|||
nir->info.inputs_read & ~nir->info.per_primitive_inputs;
|
||||
|
||||
/* Figure out where each of the incoming setup attributes lands. */
|
||||
if (mue_map) {
|
||||
if (key->mesh_input != BRW_NEVER) {
|
||||
/* Per-Primitive Attributes are laid out by Hardware before the regular
|
||||
* attributes, so order them like this to make easy later to map setup
|
||||
* into real HW registers.
|
||||
|
|
@ -1782,9 +1782,6 @@ calculate_urb_setup(const struct intel_device_info *devinfo,
|
|||
uint64_t per_prim_inputs_read =
|
||||
nir->info.inputs_read & nir->info.per_primitive_inputs;
|
||||
|
||||
unsigned per_prim_start_dw = mue_map->per_primitive_start_dw;
|
||||
unsigned per_prim_size_dw = mue_map->per_primitive_pitch_dw;
|
||||
|
||||
/* In Mesh, PRIMITIVE_SHADING_RATE, VIEWPORT and LAYER slots
|
||||
* are always at the beginning, because they come from MUE
|
||||
* Primitive Header, not Per-Primitive Attributes.
|
||||
|
|
@ -1792,47 +1789,68 @@ calculate_urb_setup(const struct intel_device_info *devinfo,
|
|||
const uint64_t primitive_header_bits = VARYING_BIT_VIEWPORT |
|
||||
VARYING_BIT_LAYER |
|
||||
VARYING_BIT_PRIMITIVE_SHADING_RATE;
|
||||
bool reads_header = (per_prim_inputs_read & primitive_header_bits) != 0;
|
||||
|
||||
if (reads_header || mue_map->user_data_in_primitive_header) {
|
||||
/* Primitive Shading Rate, Layer and Viewport live in the same
|
||||
* 4-dwords slot (psr is dword 0, layer is dword 1, and viewport
|
||||
* is dword 2).
|
||||
*/
|
||||
if (per_prim_inputs_read & VARYING_BIT_PRIMITIVE_SHADING_RATE)
|
||||
prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_SHADING_RATE] = 0;
|
||||
if (mue_map) {
|
||||
unsigned per_prim_start_dw = mue_map->per_primitive_start_dw;
|
||||
unsigned per_prim_size_dw = mue_map->per_primitive_pitch_dw;
|
||||
|
||||
if (per_prim_inputs_read & VARYING_BIT_LAYER)
|
||||
prog_data->urb_setup[VARYING_SLOT_LAYER] = 0;
|
||||
bool reads_header = (per_prim_inputs_read & primitive_header_bits) != 0;
|
||||
|
||||
if (per_prim_inputs_read & VARYING_BIT_VIEWPORT)
|
||||
prog_data->urb_setup[VARYING_SLOT_VIEWPORT] = 0;
|
||||
if (reads_header || mue_map->user_data_in_primitive_header) {
|
||||
/* Primitive Shading Rate, Layer and Viewport live in the same
|
||||
* 4-dwords slot (psr is dword 0, layer is dword 1, and viewport
|
||||
* is dword 2).
|
||||
*/
|
||||
if (per_prim_inputs_read & VARYING_BIT_PRIMITIVE_SHADING_RATE)
|
||||
prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_SHADING_RATE] = 0;
|
||||
|
||||
per_prim_inputs_read &= ~primitive_header_bits;
|
||||
if (per_prim_inputs_read & VARYING_BIT_LAYER)
|
||||
prog_data->urb_setup[VARYING_SLOT_LAYER] = 0;
|
||||
|
||||
if (per_prim_inputs_read & VARYING_BIT_VIEWPORT)
|
||||
prog_data->urb_setup[VARYING_SLOT_VIEWPORT] = 0;
|
||||
|
||||
per_prim_inputs_read &= ~primitive_header_bits;
|
||||
} else {
|
||||
/* If fs doesn't need primitive header, then it won't be made
|
||||
* available through SBE_MESH, so we have to skip them when
|
||||
* calculating offset from start of per-prim data.
|
||||
*/
|
||||
per_prim_start_dw += mue_map->per_primitive_header_size_dw;
|
||||
per_prim_size_dw -= mue_map->per_primitive_header_size_dw;
|
||||
}
|
||||
|
||||
u_foreach_bit64(i, per_prim_inputs_read) {
|
||||
int start = mue_map->start_dw[i];
|
||||
|
||||
assert(start >= 0);
|
||||
assert(mue_map->len_dw[i] > 0);
|
||||
|
||||
assert(unsigned(start) >= per_prim_start_dw);
|
||||
unsigned pos_dw = unsigned(start) - per_prim_start_dw;
|
||||
|
||||
prog_data->urb_setup[i] = urb_next + pos_dw / 4;
|
||||
prog_data->urb_setup_channel[i] = pos_dw % 4;
|
||||
}
|
||||
|
||||
urb_next = per_prim_size_dw / 4;
|
||||
} else {
|
||||
/* If fs doesn't need primitive header, then it won't be made
|
||||
* available through SBE_MESH, so we have to skip them when
|
||||
* calculating offset from start of per-prim data.
|
||||
/* With no MUE map, we never read the primitive header, and
|
||||
* per-primitive attributes won't be packed either, so just lay
|
||||
* them in varying order.
|
||||
*/
|
||||
per_prim_start_dw += mue_map->per_primitive_header_size_dw;
|
||||
per_prim_size_dw -= mue_map->per_primitive_header_size_dw;
|
||||
per_prim_inputs_read &= ~primitive_header_bits;
|
||||
|
||||
for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) {
|
||||
if (per_prim_inputs_read & BITFIELD64_BIT(i)) {
|
||||
prog_data->urb_setup[i] = urb_next++;
|
||||
}
|
||||
}
|
||||
|
||||
/* The actual setup attributes later must be aligned to a full GRF. */
|
||||
urb_next = ALIGN(urb_next, 2);
|
||||
}
|
||||
|
||||
u_foreach_bit64(i, per_prim_inputs_read) {
|
||||
int start = mue_map->start_dw[i];
|
||||
|
||||
assert(start >= 0);
|
||||
assert(mue_map->len_dw[i] > 0);
|
||||
|
||||
assert(unsigned(start) >= per_prim_start_dw);
|
||||
unsigned pos_dw = unsigned(start) - per_prim_start_dw;
|
||||
|
||||
prog_data->urb_setup[i] = urb_next + pos_dw / 4;
|
||||
prog_data->urb_setup_channel[i] = pos_dw % 4;
|
||||
}
|
||||
|
||||
urb_next = per_prim_size_dw / 4;
|
||||
|
||||
prog_data->num_per_primitive_inputs = urb_next;
|
||||
}
|
||||
|
||||
|
|
@ -1842,52 +1860,68 @@ calculate_urb_setup(const struct intel_device_info *devinfo,
|
|||
uint64_t unique_fs_attrs = inputs_read & BRW_FS_VARYING_INPUT_MASK;
|
||||
|
||||
if (inputs_read & clip_dist_bits) {
|
||||
assert(mue_map->per_vertex_header_size_dw > 8);
|
||||
assert(!mue_map || mue_map->per_vertex_header_size_dw > 8);
|
||||
unique_fs_attrs &= ~clip_dist_bits;
|
||||
}
|
||||
|
||||
unsigned per_vertex_start_dw = mue_map->per_vertex_start_dw;
|
||||
unsigned per_vertex_size_dw = mue_map->per_vertex_pitch_dw;
|
||||
if (mue_map) {
|
||||
unsigned per_vertex_start_dw = mue_map->per_vertex_start_dw;
|
||||
unsigned per_vertex_size_dw = mue_map->per_vertex_pitch_dw;
|
||||
|
||||
/* Per-Vertex header is available to fragment shader only if there's
|
||||
* user data there.
|
||||
*/
|
||||
if (!mue_map->user_data_in_vertex_header) {
|
||||
per_vertex_start_dw += 8;
|
||||
per_vertex_size_dw -= 8;
|
||||
/* Per-Vertex header is available to fragment shader only if there's
|
||||
* user data there.
|
||||
*/
|
||||
if (!mue_map->user_data_in_vertex_header) {
|
||||
per_vertex_start_dw += 8;
|
||||
per_vertex_size_dw -= 8;
|
||||
}
|
||||
|
||||
/* In Mesh, CLIP_DIST slots are always at the beginning, because
|
||||
* they come from MUE Vertex Header, not Per-Vertex Attributes.
|
||||
*/
|
||||
if (inputs_read & clip_dist_bits) {
|
||||
prog_data->urb_setup[VARYING_SLOT_CLIP_DIST0] = urb_next;
|
||||
prog_data->urb_setup[VARYING_SLOT_CLIP_DIST1] = urb_next + 1;
|
||||
} else if (mue_map && mue_map->per_vertex_header_size_dw > 8) {
|
||||
/* Clip distances are in MUE, but we are not reading them in FS. */
|
||||
per_vertex_start_dw += 8;
|
||||
per_vertex_size_dw -= 8;
|
||||
}
|
||||
|
||||
/* Per-Vertex attributes are laid out ordered. Because we always link
|
||||
* Mesh and Fragment shaders, the which slots are written and read by
|
||||
* each of them will match. */
|
||||
u_foreach_bit64(i, unique_fs_attrs) {
|
||||
int start = mue_map->start_dw[i];
|
||||
|
||||
assert(start >= 0);
|
||||
assert(mue_map->len_dw[i] > 0);
|
||||
|
||||
assert(unsigned(start) >= per_vertex_start_dw);
|
||||
unsigned pos_dw = unsigned(start) - per_vertex_start_dw;
|
||||
|
||||
prog_data->urb_setup[i] = urb_next + pos_dw / 4;
|
||||
prog_data->urb_setup_channel[i] = pos_dw % 4;
|
||||
}
|
||||
|
||||
urb_next += per_vertex_size_dw / 4;
|
||||
} else {
|
||||
/* If we don't have an MUE map, just lay down the inputs the FS reads
|
||||
* in varying order, as we do for the legacy pipeline.
|
||||
*/
|
||||
if (inputs_read & clip_dist_bits) {
|
||||
prog_data->urb_setup[VARYING_SLOT_CLIP_DIST0] = urb_next++;
|
||||
prog_data->urb_setup[VARYING_SLOT_CLIP_DIST1] = urb_next++;
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) {
|
||||
if (unique_fs_attrs & BITFIELD64_BIT(i))
|
||||
prog_data->urb_setup[i] = urb_next++;
|
||||
}
|
||||
}
|
||||
|
||||
/* In Mesh, CLIP_DIST slots are always at the beginning, because
|
||||
* they come from MUE Vertex Header, not Per-Vertex Attributes.
|
||||
*/
|
||||
if (inputs_read & clip_dist_bits) {
|
||||
prog_data->urb_setup[VARYING_SLOT_CLIP_DIST0] = urb_next;
|
||||
prog_data->urb_setup[VARYING_SLOT_CLIP_DIST1] = urb_next + 1;
|
||||
} else if (mue_map->per_vertex_header_size_dw > 8) {
|
||||
/* Clip distances are in MUE, but we are not reading them in FS. */
|
||||
per_vertex_start_dw += 8;
|
||||
per_vertex_size_dw -= 8;
|
||||
}
|
||||
|
||||
/* Per-Vertex attributes are laid out ordered. Because we always link
|
||||
* Mesh and Fragment shaders, the which slots are written and read by
|
||||
* each of them will match. */
|
||||
|
||||
u_foreach_bit64(i, unique_fs_attrs) {
|
||||
int start = mue_map->start_dw[i];
|
||||
|
||||
assert(start >= 0);
|
||||
assert(mue_map->len_dw[i] > 0);
|
||||
|
||||
assert(unsigned(start) >= per_vertex_start_dw);
|
||||
unsigned pos_dw = unsigned(start) - per_vertex_start_dw;
|
||||
|
||||
prog_data->urb_setup[i] = urb_next + pos_dw / 4;
|
||||
prog_data->urb_setup_channel[i] = pos_dw % 4;
|
||||
}
|
||||
|
||||
urb_next += per_vertex_size_dw / 4;
|
||||
} else if (devinfo->ver >= 6) {
|
||||
assert(!nir->info.per_primitive_inputs);
|
||||
|
||||
uint64_t vue_header_bits =
|
||||
VARYING_BIT_PSIZ | VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue